bitkeeper revision 1.732 (403351fdWpb7sTMMhh7hUEOPzmRvoQ)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Wed, 18 Feb 2004 11:52:29 +0000 (11:52 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Wed, 18 Feb 2004 11:52:29 +0000 (11:52 +0000)
hypervisor.h, hypervisor.c, traps.c, entry.S:
  Fix failsafe handling and LDT-trap handling.

xen/arch/i386/traps.c
xenolinux-2.4.24-sparse/arch/xeno/kernel/entry.S
xenolinux-2.4.24-sparse/arch/xeno/kernel/traps.c
xenolinux-2.4.24-sparse/arch/xeno/mm/hypervisor.c
xenolinux-2.4.24-sparse/include/asm-xeno/hypervisor.h

index 86ba116b6d7d29ef251db89800c62bc4ade383c9..58ace4707eed733f4d668e8432d77daa061ba761 100644 (file)
@@ -317,16 +317,24 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
 {
     struct guest_trap_bounce *gtb = guest_trap_bounce+smp_processor_id();
     trap_info_t *ti;
-    unsigned long addr, fixup;
+    unsigned long off, addr, fixup;
     struct task_struct *p = current;
     extern int map_ldt_shadow_page(unsigned int);
 
     __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
 
-    if ( unlikely(addr > PAGE_OFFSET) )
-        goto fault_in_xen_space;
-
- propagate_fault:
+    if ( unlikely(addr >= LDT_VIRT_START) && 
+         (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
+    {
+        /*
+         * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
+         * send the fault up to the guest OS to be handled.
+         */
+        off  = addr - LDT_VIRT_START;
+        addr = p->mm.ldt_base + off;
+        if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT) == 0) )
+            return; /* successfully copied the mapping */
+    }
 
     if ( unlikely(!(regs->xcs & 3)) )
         goto fault_in_hypervisor;
@@ -341,20 +349,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
         clear_bit(EVENTS_MASTER_ENABLE_BIT, &p->shared_info->events_mask);
     return; 
 
-    /*
-     * FAULT IN XEN ADDRESS SPACE:
-     *  We only deal with one kind -- a fault in the shadow LDT mapping.
-     *  If this occurs we pull a mapping from the guest's LDT, if it is
-     *  valid. Otherwise we send the fault up to the guest OS to be handled.
-     */
- fault_in_xen_space:
-
-    if ( (addr < LDT_VIRT_START) || 
-         (addr >= (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) ||
-         map_ldt_shadow_page((addr - LDT_VIRT_START) >> PAGE_SHIFT) )
-        goto propagate_fault;
-    return;
-
  fault_in_hypervisor:
 
     if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
index 4f676eefc63beabfe5dfb6df5db5cf0959444823..9b1a77d4c1b23e133d02d02e551b517cc9d49acc 100644 (file)
@@ -367,10 +367,12 @@ critical_fixup_table:
 
 # Hypervisor uses this for application faults while it executes.
 ENTRY(failsafe_callback)
+        call SYMBOL_NAME(install_safe_pf_handler)
 1:      pop  %ds
 2:      pop  %es
 3:      pop  %fs
 4:      pop  %gs
+        call SYMBOL_NAME(install_normal_pf_handler)
 5:      iret
 .section .fixup,"ax";  \
 6:     movl $0,(%esp); \
@@ -470,34 +472,37 @@ ENTRY(alignment_check)
 
 # This handler is special, because it gets an extra value on its stack,
 # which is the linear faulting address.
-ENTRY(page_fault)
-       pushl %ds
-       pushl %eax
-       xorl %eax,%eax
-       pushl %ebp
-       pushl %edi
-       pushl %esi
-       pushl %edx
-       decl %eax                       # eax = -1
-       pushl %ecx
-       pushl %ebx
-       GET_CURRENT(%ebx)
-       cld
-       movl %es,%ecx
-       movl ORIG_EAX(%esp), %esi       # get the error code
-       movl ES(%esp), %edi             # get the faulting address
-       movl %eax, ORIG_EAX(%esp)
-       movl %ecx, ES(%esp)
-       movl %esp,%edx
-        pushl %edi                      # push the faulting address
-       pushl %esi                      # push the error code
-       pushl %edx                      # push the pt_regs pointer
-       movl $(__KERNEL_DS),%edx
-       movl %edx,%ds
-       movl %edx,%es
-       call SYMBOL_NAME(do_page_fault)
-       addl $12,%esp
-       jmp ret_from_exception
+#define PAGE_FAULT_STUB(_name1, _name2)                                  \
+ENTRY(_name1)                                                            \
+       pushl %ds                                                      ; \
+       pushl %eax                                                     ; \
+       xorl %eax,%eax                                                 ; \
+       pushl %ebp                                                     ; \
+       pushl %edi                                                     ; \
+       pushl %esi                                                     ; \
+       pushl %edx                                                     ; \
+       decl %eax                      /* eax = -1 */                  ; \
+       pushl %ecx                                                     ; \
+       pushl %ebx                                                     ; \
+       GET_CURRENT(%ebx)                                              ; \
+       cld                                                            ; \
+       movl %es,%ecx                                                  ; \
+       movl ORIG_EAX(%esp), %esi      /* get the error code */        ; \
+       movl ES(%esp), %edi            /* get the faulting address */  ; \
+       movl %eax, ORIG_EAX(%esp)                                      ; \
+       movl %ecx, ES(%esp)                                            ; \
+       movl %esp,%edx                                                 ; \
+        pushl %edi                     /* push the faulting address */ ; \
+       pushl %esi                     /* push the error code */       ; \
+       pushl %edx                     /* push the pt_regs pointer */  ; \
+       movl $(__KERNEL_DS),%edx                                       ; \
+       movl %edx,%ds                                                  ; \
+       movl %edx,%es                                                  ; \
+       call SYMBOL_NAME(_name2)                                       ; \
+       addl $12,%esp                                                  ; \
+       jmp ret_from_exception                                         ;
+PAGE_FAULT_STUB(page_fault, do_page_fault)
+PAGE_FAULT_STUB(safe_page_fault, do_safe_page_fault)
 
 ENTRY(machine_check)
        pushl $0
index 0b172ff87dfd6bc9642d66c18d76ae028432283c..63288fc2826acd23e574c81a895018348f56836d 100644 (file)
@@ -59,6 +59,7 @@ asmlinkage void segment_not_present(void);
 asmlinkage void stack_segment(void);
 asmlinkage void general_protection(void);
 asmlinkage void page_fault(void);
+asmlinkage void safe_page_fault(void);
 asmlinkage void coprocessor_error(void);
 asmlinkage void simd_coprocessor_error(void);
 asmlinkage void alignment_check(void);
@@ -601,7 +602,6 @@ static trap_info_t trap_table[] = {
           3, __KERNEL_CS, (unsigned long)system_call                 },
     {  0, 0,           0, 0                           }
 };
-    
 
 
 void __init trap_init(void)
@@ -620,3 +620,65 @@ void __init trap_init(void)
 
     cpu_init();
 }
+
+
+/*
+ * install_safe_pf_handler / install_normal_pf_handler:
+ * 
+ * These are used within the failsafe_callback handler in entry.S to avoid
+ * taking a full page fault when reloading FS and GS. This is because FS and 
+ * GS could be invalid at pretty much any point while Xenolinux executes (we 
+ * don't set them to safe values on entry to the kernel). At *any* point Xen 
+ * may be entered due to a hardware interrupt --- on exit from Xen an invalid 
+ * FS/GS will cause our failsafe_callback to be executed. This could occur, 
+ * for example, while the mmmu_update_queue is in an inconsistent state. This
+ * is disastrous because the normal page-fault handler touches the update
+ * queue!
+ * 
+ * Fortunately, within the failsafe handler it is safe to force DS/ES/FS/GS
+ * to zero if they cannot be reloaded -- at this point executing a normal
+ * page fault would not change this effect. The safe page-fault handler
+ * ensures this end result (blow away the selector value) without the dangers
+ * of the normal page-fault handler.
+ * 
+ * NB. Perhaps this can all go away after we have implemented writeable
+ * page tables. :-)
+ */
+
+asmlinkage void do_safe_page_fault(struct pt_regs *regs, 
+                                   unsigned long error_code,
+                                   unsigned long address)
+{
+    unsigned long fixup;
+
+    if ( (fixup = search_exception_table(regs->eip)) != 0 )
+    {
+        regs->eip = fixup;
+        return;
+    }
+
+    die("Unhandleable 'safe' page fault!", regs, error_code);
+}
+
+unsigned long install_safe_pf_handler(void)
+{
+    static trap_info_t safe_pf[] = { 
+        { 14, 0, __KERNEL_CS, (unsigned long)safe_page_fault },
+        {  0, 0,           0, 0                              }
+    };
+    unsigned long flags;
+    local_irq_save(flags);
+    HYPERVISOR_set_trap_table(safe_pf);
+    return flags; /* This is returned in %%eax */
+}
+
+__attribute__((regparm(3))) /* This function take its arg in %%eax */
+void install_normal_pf_handler(unsigned long flags)
+{
+    static trap_info_t normal_pf[] = { 
+        { 14, 0, __KERNEL_CS, (unsigned long)page_fault },
+        {  0, 0,           0, 0                         }
+    };
+    HYPERVISOR_set_trap_table(normal_pf);
+    local_irq_restore(flags);
+}
index 6bc8baa47abf37b9774dd90f5711f31fd3a57f68..94592d63f1aa974fcfeac3ac00981be9f98dc53d 100644 (file)
@@ -86,8 +86,9 @@ static void DEBUG_disallow_pt_read(unsigned long va)
 void MULTICALL_flush_page_update_queue(void)
 {
     unsigned long flags;
+    unsigned int _idx;
     spin_lock_irqsave(&update_lock, flags);
-    if ( idx != 0 ) 
+    if ( (_idx = idx) != 0 ) 
     {
 #if MMU_UPDATE_DEBUG > 1
         printk("Flushing %d entries from pt update queue\n", idx);
@@ -95,24 +96,27 @@ void MULTICALL_flush_page_update_queue(void)
 #if MMU_UPDATE_DEBUG > 0
         DEBUG_allow_pt_reads();
 #endif
+        idx = 0;
+        wmb(); /* Make sure index is cleared first to avoid double updates. */
         queue_multicall2(__HYPERVISOR_mmu_update, 
                          (unsigned long)update_queue, 
-                         idx);
-        idx = 0;
+                         _idx);
     }
     spin_unlock_irqrestore(&update_lock, flags);
 }
 
 static inline void __flush_page_update_queue(void)
 {
+    unsigned int _idx = idx;
 #if MMU_UPDATE_DEBUG > 1
     printk("Flushing %d entries from pt update queue\n", idx);
 #endif
 #if MMU_UPDATE_DEBUG > 0
     DEBUG_allow_pt_reads();
 #endif
-    HYPERVISOR_mmu_update(update_queue, idx);
     idx = 0;
+    wmb(); /* Make sure index is cleared first to avoid double updates. */
+    HYPERVISOR_mmu_update(update_queue, _idx);
 }
 
 void _flush_page_update_queue(void)
index f8183fcfd77e68bd55872969ec7a92f31b1ee702..18dbe86ec1be07dec3160d3b9b07f8b6e9e39c77 100644 (file)
@@ -169,7 +169,11 @@ static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count)
         "b" (req), "c" (count) : "memory" );
 
     if ( unlikely(ret < 0) )
+    {
+        extern void show_trace(unsigned long *);
+        show_trace(NULL);
         panic("Failed mmu update: %p, %d", req, count);
+    }
 
     return ret;
 }